import re
from bs4 import BeautifulSoup
from urllib.request import urlopen
import random


base_url = 'http://www.baike.baidu.com'
his = ['/item/%E7%BD%91%E7%BB%9C%E7%88%AC%E8%99%AB']

for i in range(20):
    url = base_url + his[-1]
    html = urlopen(url).read().decode("utf-8")
    soup = BeautifulSoup(html,features = "lxml")

    sub_urls = soup.find_all("a",{"target":"_blank","href" : re.compile("/item/")})

    if len(sub_urls) != 0:
        his.append(random.sample(sub_urls,1)[0]['href'])
    else:
        his.pop()
